from .models import WordEntry

GENDER_TAGS = {
    # Modul:gender_and_number/data
    "m": "masculine",
    "f": "feminine",
    "n": "neuter",
    "c": "common",
    "neutral": "neutral",
    "bernyawa": "animate",
    "tak bernyawa": "inanimate",
    "haiwan": "animal-not-person",
    "peribadi": "personal",
    "tak peribadi": "impersonal",
    "vir": "virile",
    "nvir": "nonvirile",
    "mf": "singular",
    "du": "dual",
    "jm": "plural",
    "impf": "imperfective",
    "pf": "perfective",
    "takrifan sama": ["masculine", "feminine"],
    "mengikut keadaan": ["masculine", "feminine"],
}

LB_TAGS = {
    # Modul:labels/data
    "kependekan": "abbreviation",
    "akronim": "acronym",
    "transitif": "transitive",
    "tidak transitif": "intransitive",
    "jussive": "jussive",
    "arkaik": "archaic",
    "atelic": "imperfective",
    "kata bantu": ["auxiliary", "verb"],
    "nombor kardinal": "cardinal",
    "kausatif": "causative",
    # "berbilang": "",
    "kebudak-budakan": "childish",
    "chữ Nôm Vietnam": ["Chữ-Nôm", "Vietnam"],
    "hinaan": "offensive",
    "hinaan kaum": ["ethnic", "offensive"],
    "eufemisme": "euphemistic",
    "kiasan": "figuratively",
    "jenaka": "humorous",
    "tidak formal": "informal",
    "ironi": "ironic",
    "harfiah": "literally",
    "slanga perubatan": "slang",  # medicine
    "metonim": "metonymically",
    "neologisme": "neologism",
    "bentuk bukan baku": "nonstandard",
    "usang": "obsolete",
    "lapuk": "obsolete",
    "kata kasar": "impolite",
    "sopan": "polite",
    "pasca-Klasik": "post-Classical",
    "slanga penjara": "slang",  # prison
    # "hina agama": "",
    "slanga": "slang",
    "slanga sekolah": "slang",  # school
    # "hina diri": "",
    "slanga universiti": "slang",  # university
    "sinkop": "syncope",
    # "teknikal": "",  # technical
    "slanga mesej": "slang",  # message
    "lucah": "vulgar",
    "Amerika": "America",
    "Politik Malaysia": "Malaysia",
    "retorik": "rhetoric",
    "Kesatuan Soviet": "Soviet Union",
    "peribahasa": "proverb",
}

POS_HEADER_TAGS = {
    "ejaan Jawi": "Jawi",
    "genitif": "genitive",
    "jamak": "plural",
    "terbilang dan tidak terbilang": ["countable", "uncountable"],
    "bentuk jamak": "plural",
    "Ejaan bahasa Urdu": "Urdu",
    "partitif": "partitive",
}

SOUND_TAGS = {
    "Received Pronunciation": "Received-Pronunciation",
    "General American": "General-American",
    "UK": "UK",
    "A.S.": "General-American",
    "Pinyin": "Pinyin",
    "Wade-Giles": "Wade-Giles",
}


TAGS = {**GENDER_TAGS, **POS_HEADER_TAGS, **SOUND_TAGS, **LB_TAGS}


TOPICS = {
    # Modul:labels/data/topical
    "perakaunan": "accounting",
    "akustik": "acoustics",
    "lakonan": "acting",
    "periklanan": "advertising",
    "aeronautik": "aeronautics",
    "pertanian": "agriculture",
    "alkimia": "alchemy",
    "alkohol": "beverages",
    "algebra": "algebra",
    "geometri algebra": ["geometry", "algebra"],
    "perubatan alternatif": "alternative-medicine",
    "bola sepak Amerika": "American-football",
    "biokimia": "biochemistry",
    # "analisis": "analysis",
    "analytic geometry": "geometry",
    "kimia analisis": "chemistry",
    "anarkisme": "anarchism",
    "anatomi": "anatomy",
    "animasi": "anime",
    "anime": "anime",
    "antropologi": "anthropology",
    "araknologi": "arachnology",
    "arkeologi": "archeology",
    "memanah": "archery",
    "seni bina": "architecture",
    "mitologi Armenia": ["Armenia", "mythology"],
    "kecerdasan buatan": "artificial-intelligence",
    "seni": "arts",
    "uranography": "uranography",
    "astrologi": "astrology",
    "astronautik": "astronautics",
    "astronomi": "astronomy",
    "astrofizik": "astrophysics",
    "mitologi Asturia": ["Asturia", "mythology"],
    "olahraga": "sports",
    "auto racing": "racing",
    "automotif": "automotive",
    "penerbangan": "aviation",
    "backgammon": "backgammon",
    "bakteriologi": "bacteriology",
    "badminton": "badminton",
    "permainan bola": "ball-games",
    "balet": "ballet",
    "perbankan": "banking",
    "besbol": "baseball",
    "bola kerangjang": "basketball",
    "BDSM": "BDSM",
    "beekeeping": "beekeeping",
    "perjudian": "gambling",
    "Alkitab": ["Christianity", "biblical", "religion"],
    "biblical": "biblical",
    "billiards": "billiards",
    "bingo": "bingo",
    "biologi": "biology",
    "bioteknologi": "biotechnology",
    "birdwatching": "birdwatching",
    "blogging": "blogging",
    "permainan papan": "board-games",
    "board sports": "board-games",
    "bina badan": "bodybuilding",
    "botani": "botany",
    "boling": "bowling",
    "tinju": "boxing",
    "brewing": "brewing",
    "bridge": "bridge",
    "penyiaran": "broadcasting",
    "briologi": "bryology",
    "Buddhisme": "Buddhism",
    "bullfighting": "bullfighting",
    "perniagaan": "commerce",
    "kalkulus": "calculus",
    "Canadian football": "football",
    "zoologi anjing": ["zoology", "dogs"],
    "kardiologi": "cardiology",
    "cartography": "cartography",
    "category theory": "category-theory",
    "caving": "caving",
    "Celtic mythology": "Celtic-mythology",
    "seramik": "ceramics",
    "cheerleading": "cheerleading",
    "kejuruteraan kimia": "chemistry-engineering",
    "kimia": "chemistry",
    "catur": "chess",
    "mitologi Cina": "Chinese-mythology",
    "Kristian": "Christianity",
    "cinematography": "cinematography",
    "mekanik klasik": "classical-mechanics",
    "classical studies": "classical-studies",
    "climatology": "climatology",
    "climbing": "climbing",
    "clinical psychology": "clinical-psychology",
    "combinatorics": "combinatorics",
    "comedy": "comedy",
    "komik": "comics",
    "komunikasi": "communications",
    "komunisme": "communism",
    "analisis kompleks": "complex-analysis",
    "permainan komputer": "computer-games",
    "grafik komputer": "computer graphics",
    "perkakasan komputer": "computer hardware",
    "sains komputer": "computer-sciences",
    "pengkomputan": "computing",
    "computing theory": "computing-theory",
    "conchology": "conchology",
    "pembinaan": "construction",
    "memasak": "cooking",
    "hak cipta": "copyright",
    "kosmetik": "cosmetics",
    "kriket": "cricket",
    "criminology": "criminology",
    "cryptography": "cryptography",
    "cryptozoology": "cryptozoology",
    "crystallography": "crystallography",
    "curling": "curling",
    "numismatik": "numismatics",
    "cycling": "cycling",
    "cytology": "cytology",
    "dance": "dance",
    "darts": "darts",
    "pangkalan data": "databases",
    "demoscene": "demoscene",
    "pergigian": "dentistry",
    "dermatologi": "dermatology",
    "diplomasi": "diplomacy",
    "pathology": "pathology",
    "diving": "diving",
    "domino": "dominoes",
    "drama": "drama",
    "dressage": "dressage",
    "penghasilan makanan": "food-manufacture",
    # "sains bumi": "",
    "ekologi": "ecology",
    "ekonomi": "economy",
    "pendidikan": "education",
    "kejuruteraan elektrik": "electrical-engineering",
    "keelektrikan": "electricity",
    "keelektromagnetan": "electromagnetism",
    "elektronik": "electronic",
    "embriologi": "embryology",
    "perubatan kecemasan": "emergency-medicine",
    "kejuruteraan": "engineering",
    "entomologi": "entomology",
    "enzim": "enzyme",
    "epidemiologi": "epidemiology",
    "epistemologi": "epistemology",
    "etika": "ethics",
    "etnografi": "ethnography",
    "senaman": "exercise",
    "falconry": "falconry",
    "fesyen": "fashion",
    "kimia organik": "organic-chemistry",
    "filem": "film",
    "kewangan": "finance",
    "memancing": "fishing",
    "dinamik bendalir": "fluid-dynamics",
    "perhutanan": "forestry",
    "perabot": "furniture",
    "genealogi": "genealogy",
    "genetik": "genetics",
    "geografi": "geography",
    "geologi": "geology",
    "geometri": "geometry",
    "geomorfologi": "geomorphology",
    "gerontologi": "gerontology",
    "golf": "golf",
    "kerajaan": "government",
    "tatabahasa": "grammar",
    "gimnastik": "gymnastics",
    "ginekologi": "gynecology",
    "hematologi": "hematology",
    "Hinduisme": "Hinduism",
    "historiografi": "historiography",
    "sejarah": "history",
    "hoki": "hockey",
    "homeopati": "homeopathy",
    "hormon": "hormone",
    "lumba kuda": "horse-racing",
    "horticulture": "horticulture",
    "sumber manusia": "human-resources",
    "kemanusiaan": "humanity",
    "perburuan": "hunting",
    "hidrologi": "hydrology",
    "hoki ais": "ice-hockey",
    "imunokimia": "immunochemistry",
    "imunologi": "immunology",
    "sains maklumat": "information-science",
    "teori maklumat": "information-theory",
    "fizik": "physics",
    "kewartawanan": "journalism",
    "judo": "judo",
    "undang-undang": "law",
    "leksikografi": "lexicography",
    "likenologi": "lichenology",
    "limnologi": "limnology",
    "linguistik": "linguistics",
    "kesusasteraan": "literature",
    "logik": "logic",
    "malakologi": "malacology",
    "pemasaran": "marketing",
    "Marxisme": "Marxism",
    "sains bahan": "material-science",
    "matematik": "mathematics",
    "mekanik": "mechanics",
    "perubatan": "medicine",
    "metalurgi": "metallurgy",
    "meteorologi": "meteorology",
    "metrologi": "metrology",
    "mikrobiologi": "microbiology",
    "ketenteraan": "military",
    "mineralogi": "mineralogy",
    "perlombongan": "mining",
    "wang": "money",
    "otot": "muscle",
    "muzik": "music",
    "alat muzik": "musical-instrument",
    "mikologi": "mycology",
    "mitologi": "mythology",
    "nanoteknologi": "nanotechnology",
    "nautika": "nautical",
    "Nazisme": "Nazism",
    "neuroanatomi": "neuroanatomy",
    "neurologi": "neurology",
    "neurosains": "neuroscience",
    "fizik nuklear": "nuclear-physics",
    "teori nombor": "number-theory",
    "oseanografi": "oceanography",
    "onkologi": "oncology",
    "permainan dalam talian": "video-games",
    "optik": "optics",
    "sebatian organik": "organic-compound",
    "ornitologi": "ornithology",
    "ortodontik": "orthodontics",
    "paleontologi": "paleontology",
    "parapsikologi": "parapsychology",
    "fizik zarah": "particle-physics",
    "pempasteuran": "pasteurization",
    "patologi": "pathology",
    "petrokimia": "petrochemical",
    "petrologi": "petrology",
    "farmakologi": "pharmacology",
    "farmasi": "pharmacy",
    "filateli": "philately",
    "falsafah": "philosophy",
    "fonetik": "phonetics",
    "fonologi": "phonology",
    "fotografi": "photography",
    "kimia fizik": ["physics", "chemistry"],
    "fisiologi": "physiology",
    "planetologi": "planetology",
    "toksikologi": "toxicology",
    "sains politik": "political-science",
    "politik": "politics",
    "Politik Malaysia": "politics",
    "pornografi": "pornography",
    "percetakan": "printing",
    "teori kebarangkalian": "probability-theory",
    "pengaturcaraan": "programming",
    "undang-undang hartanah": ["real-estate", "law"],
    "psikiatri": "psychiatry",
    "psikoanalisis": "psychoanalysis",
    "psikologi": "psychology",
    "psikoterapi": "psychotherapy",
    "penerbitan": "publishing",
    "mekanik kuantum": "quantum-mechanics",
    "pengangkutan rel": "rail-transport",
    "agama": "religion",
    "robotik": "robotics",
    "Roman Katolik": "Roman-Catholicism",
    "mitologi Rom": "Roman-mythology",
    "ragbi": "rugby",
    "cereka sains": "science-fiction",
    "sains": "sciences",
    "seismologi": "seismology",
    "semantik": "semantics",
    "semiotik": "semiotics",
    "teori set": "set-theory",
    "menjahit": "sewing",
    "keseksualan": "sexuality",
    "pemprosesan isyarat": "signal processing",
    "menyanyi": "singing",
    "snuker": "snooker",
    "bola sepak": "soccer",
    "sains sosial": "social-science",
    "sosialisme": "socialism",
    "media sosial": "social-media",
    "sosiolinguistik": "sociolinguistics",
    "sosiologi": "sociology",
    "bola lisut": "softball",
    "perisian": "software",
    "kejuruteraan perisian": "software-engineering",
    "sains tanah": "soil-science",
    "bunyi": "sound",
    "kejuruteraan bunyi": "sound-engineering",
    "sains angkasa": "space-science",
    "spektroskopi": "spectroscopy",
    "sukan": "sports",
    "skuasy": "squash",
    "statistik": "statistics",
    "pasaran saham": "stock-market",
    "subbudaya": "subculture",
    "kesufian": "Sufism",
    "pembedahan": "surgery",
    "berenang": "swimming",
    "teori sistem": "systems-theory",
    "percukaian": "taxation",
    "taksonomi": "taxonomy",
    "teknologi": "technology",
    "telekomunikasi": "telecommunications",
    "televisyen": "television",
    "tenis": "tennis",
    "tekstil": "textiles",
    "teater": "theater",
    "teologi": "theology",
    "termodinamik": "thermodynamics",
    "topologi": "topology",
    "pelancongan": "tourism",
    "perdagangan": "commerce",
    "pengangkutan": "transport",
    "trigonometri": "trigonometry",
    "tipografi": "typography",
    "kenderaan": "vehicles",
    "perubatan veterinar": ["veterinary", "medicine"],
    "genre permainan video": "video-games",
    "permainan video": "video-games",
    "virologi": "virology",
    "volkanologi": "volcanology",
    "bola tampar": "volleyball",
    "senjata": "weapon",
    "cuaca": "weather",
    "reka bentuk web": "web design",
    "angkat berat": "weightlifting",
    "wain": "wine",
    "pertukangan kayu": "carpentry",
    "gusti": "wrestling",
    "Islam": "Islam",
}


def translate_raw_tags(data: WordEntry) -> None:
    raw_tags = []
    for raw_tag in data.raw_tags:
        find_tag = False
        if raw_tag in TAGS and hasattr(data, "tags"):
            find_tag = True
            tr_tag = TAGS[raw_tag]
            if isinstance(tr_tag, str):
                data.tags.append(tr_tag)
            elif isinstance(tr_tag, list):
                data.tags.extend(tr_tag)
        if raw_tag in TOPICS and hasattr(data, "topics"):
            find_tag = True
            topic = TOPICS[raw_tag]
            if isinstance(topic, str):
                data.topics.append(topic)
            elif isinstance(topic, list):
                data.topics.extend(topic)
        if not find_tag:
            raw_tags.append(raw_tag)
    data.raw_tags = raw_tags
